From e5e9ce51999838451bf4854a533a1bca25455133 Mon Sep 17 00:00:00 2001
From: "akw27@boulderdash.cl.cam.ac.uk" <akw27@boulderdash.cl.cam.ac.uk>
Date: Wed, 5 Feb 2003 13:40:29 +0000
Subject: [PATCH] bitkeeper revision 1.15.1.13 (3e41144dWc5GH88F3idrXT41kpovhQ)

Zero copy RX path is working with guest-allocated page pool.
---
 xen-2.4.16/common/domain.c                    |   4 +-
 xen-2.4.16/common/event.c                     |   4 +-
 xen-2.4.16/common/network.c                   |   2 +-
 xen-2.4.16/include/hypervisor-ifs/network.h   |   1 +
 xen-2.4.16/include/xeno/vif.h                 |   2 +-
 xen-2.4.16/net/dev.c                          | 247 ++++++++++++++++--
 xen-2.4.16/net/skbuff.c                       |  13 +-
 .../arch/xeno/drivers/network/network.c       |  12 +-
 xenolinux-2.4.16-sparse/include/asm-xeno/io.h |  18 +-
 xenolinux-2.4.16-sparse/net/core/skbuff.c     |  30 ++-
 10 files changed, 276 insertions(+), 57 deletions(-)

diff --git a/xen-2.4.16/common/domain.c b/xen-2.4.16/common/domain.c
index fc71bad3ef..520a9a8511 100644
--- a/xen-2.4.16/common/domain.c
+++ b/xen-2.4.16/common/domain.c
@@ -402,8 +402,8 @@ extern module_t *mod;
 extern unsigned char *cmdline;
 int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
 {
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
 #define ALLOC_FRAME_FROM_DOMAIN() (alloc_address -= PAGE_SIZE)
     char *src, *dst;
     int i, dom = p->domain;
diff --git a/xen-2.4.16/common/event.c b/xen-2.4.16/common/event.c
index 6a81c63f8b..3e7c416c29 100644
--- a/xen-2.4.16/common/event.c
+++ b/xen-2.4.16/common/event.c
@@ -14,13 +14,13 @@
 typedef void (*hyp_event_callback_fn_t)(void);
 
 extern void schedule(void);
-extern void flush_rx_queue(void);
+extern void update_shared_ring(void);
 
 /* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
 static hyp_event_callback_fn_t event_call_fn[] = 
 {
     schedule,
-    flush_rx_queue,
+    update_shared_ring,
     kill_domain
 };
 
diff --git a/xen-2.4.16/common/network.c b/xen-2.4.16/common/network.c
index df4cf04262..91d1f66315 100644
--- a/xen-2.4.16/common/network.c
+++ b/xen-2.4.16/common/network.c
@@ -78,7 +78,7 @@ net_vif_t *create_net_vif(int domain)
     if ((shadow_ring->tx_ring == NULL) || (shadow_ring->rx_ring == NULL))
             goto fail;
 
-    shadow_ring->rx_prod = 0;
+    shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
     
     // fill in the new vif struct.
     
diff --git a/xen-2.4.16/include/hypervisor-ifs/network.h b/xen-2.4.16/include/hypervisor-ifs/network.h
index 182fd655fc..f3f13dc77b 100644
--- a/xen-2.4.16/include/hypervisor-ifs/network.h
+++ b/xen-2.4.16/include/hypervisor-ifs/network.h
@@ -119,5 +119,6 @@ int add_net_rule(net_rule_t *rule);
 
 #define RING_STATUS_OK               0  // Everything is gravy.
 #define RING_STATUS_ERR_CFU         -1  // Copy from user problems.
+#define RING_STATUS_BAD_PAGE        -2  // What they gave us was pure evil.
 
 #endif
diff --git a/xen-2.4.16/include/xeno/vif.h b/xen-2.4.16/include/xeno/vif.h
index a83ae515a3..761188be74 100644
--- a/xen-2.4.16/include/xeno/vif.h
+++ b/xen-2.4.16/include/xeno/vif.h
@@ -42,7 +42,7 @@ typedef struct rx_shadow_entry_st {
 typedef struct net_shadow_ring_st {
     tx_shadow_entry_t *tx_ring;
     rx_shadow_entry_t *rx_ring;
-    unsigned int rx_prod;  // trying to add shadow pointers only as I need to.
+    unsigned int rx_prod, rx_cons, rx_idx;
 } net_shadow_ring_t;
 
 typedef struct net_vif_st {
diff --git a/xen-2.4.16/net/dev.c b/xen-2.4.16/net/dev.c
index 749d50aa3f..fa7f6e197d 100644
--- a/xen-2.4.16/net/dev.c
+++ b/xen-2.4.16/net/dev.c
@@ -31,6 +31,7 @@
 
 #include <linux/event.h>
 #include <asm/domain_page.h>
+#include <asm/pgalloc.h>
 
 #define BUG_TRAP ASSERT
 #define notifier_call_chain(_a,_b,_c) ((void)0)
@@ -39,6 +40,12 @@
 #define rtnl_unlock() ((void)0)
 #define dst_init() ((void)0)
 
+// Ring defines:
+#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
+#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
+#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
+#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
+
 struct net_device *the_dev = NULL;
 
 /*
@@ -48,11 +55,11 @@ struct net_device *the_dev = NULL;
 struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
 
 
-/*****************************************************************************************
+/*********************************************************************************
 
 			    Device Interface Subroutines
 
-******************************************************************************************/
+**********************************************************************************/
 
 /**
  *	__dev_get_by_name	- find a device by its name 
@@ -662,7 +669,83 @@ static void get_sample_stats(int cpu)
 	softnet_data[cpu].avg_blog = avg_blog;
 }
 
+void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
+{
+        net_shadow_ring_t *shadow_ring;
+        rx_shadow_entry_t *rx;
+        unsigned long *g_pte, tmp;
+        struct pfn_info *g_pfn, *h_pfn;
+        unsigned int i; //, nvif;
+
+        if (skb->skb_type != SKB_ZERO_COPY) 
+            return;
+        
+        /*
+         * Write the virtual MAC address into the destination field
+         * of the ethernet packet. Furthermore, do the same for ARP
+         * reply packets. This is easy because the virtual MAC address
+         * is always 00-[nn]-00-00-00-00, where the second sixteen bits 
+         * of the MAC are the vif's id.  This is to differentiate between
+         * vifs on guests that have more than one.
+         *
+         * In zero copy, the data pointers for the packet have to have been 
+         * mapped in by the caller.
+         */
 
+        memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
+//        *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
+        if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
+        {
+            memset(skb->nh.raw + 18, 0, ETH_ALEN);
+//            *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
+        }
+        shadow_ring = vif->shadow_ring;
+
+        //Advance to next good buffer.
+        for (i = shadow_ring->rx_cons; 
+             (i != shadow_ring->rx_prod) 
+             && ( shadow_ring->rx_ring[i].status != RING_STATUS_OK );
+             i = RX_RING_INC(i));
+            
+        if (( i != shadow_ring->rx_prod ) &&
+            ( shadow_ring->rx_ring[i].status == RING_STATUS_OK ))
+        {
+            rx = shadow_ring->rx_ring+i;
+            if ( (skb->len + ETH_HLEN) < rx->size )
+                rx->size = skb->len + ETH_HLEN;
+                        
+            if (rx->flush_count == tlb_flush_count[smp_processor_id()])
+                flush_tlb_all();
+            
+            g_pte = map_domain_mem(rx->addr);
+
+            g_pfn =  frame_table + (*g_pte >> PAGE_SHIFT);
+            h_pfn = skb->pf;
+
+            //flip and/or set relevant pf_info fields.
+            tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp;
+            tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp;
+            tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp;
+            h_pfn->tot_count = 1;
+            h_pfn->type_count = g_pfn->type_count;
+            g_pfn->tot_count = g_pfn->type_count = 0;
+            h_pfn->flags = current->domain | PGT_l1_page_table;
+            g_pfn->flags = PGT_l1_page_table;
+            //point guest pte at the new page:
+            *g_pte = (*g_pte & ~PAGE_MASK) 
+                | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
+            *g_pte |= _PAGE_PRESENT;
+                
+            unmap_domain_mem(g_pte);
+            skb->pf = g_pfn; // return the guest pfn to be put on the free list
+                
+            shadow_ring->rx_cons = RX_RING_INC(i);
+        }
+}
+
+/* Deliver skb to an old protocol, which is not threaded well
+   or which do not understand shared skbs.
+ */
 /**
  *	netif_rx	-	post buffer to the network code
  *	@skb: buffer to post
@@ -687,12 +770,15 @@ int netif_rx(struct sk_buff *skb)
 #ifdef CONFIG_SMP
         unsigned long cpu_mask;
 #endif
+        
         struct task_struct *p;
 	int this_cpu = smp_processor_id();
 	struct softnet_data *queue;
 	unsigned long flags;
         net_vif_t *vif;
 
+	local_irq_save(flags);
+        
 	if (skb->stamp.tv_sec == 0)
 		get_fast_time(&skb->stamp);
 
@@ -709,14 +795,13 @@ int netif_rx(struct sk_buff *skb)
                 skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this.
                 skb->mac.raw = skb->data;
                 skb->data += ETH_HLEN;
+                skb->nh.raw = skb->data;
         }
         
 	/* The code is rearranged so that the path is the most
 	   short when CPU is congested, but is still operating.
 	 */
 	queue = &softnet_data[this_cpu];
-
-	local_irq_save(flags);
         
 	netdev_rx_stat[this_cpu].total++;
 
@@ -749,7 +834,7 @@ int netif_rx(struct sk_buff *skb)
             do {
                 if ( p->domain != vif->domain ) continue;
                 if ( vif->skb_list.qlen > 100 ) break;
-                skb_queue_tail(&vif->skb_list, skb);
+                deliver_packet(skb, vif);
                 cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
                 read_unlock(&tasklist_lock);
                 goto found;
@@ -761,28 +846,24 @@ int netif_rx(struct sk_buff *skb)
 
 drop:
 	netdev_rx_stat[this_cpu].dropped++;
-	local_irq_restore(flags);
-
         if (skb->skb_type == SKB_ZERO_COPY)
                 unmap_domain_mem(skb->head);
-        
 	kfree_skb(skb);
+        local_irq_restore(flags);
 	return NET_RX_DROP;
 
 found:
         if (skb->skb_type == SKB_ZERO_COPY) {
                 unmap_domain_mem(skb->head);
-                //skb->head = (u8 *)((skb->pf - frame_table) << PAGE_SHIFT);
                 skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
         }
+        kfree_skb(skb);
         hyp_event_notify(cpu_mask);
         local_irq_restore(flags);
         return 0;
 }
 
-/* Deliver skb to an old protocol, which is not threaded well
-   or which do not understand shared skbs.
- */
+
 static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
 {
 	static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
@@ -897,7 +978,35 @@ static inline void handle_diverter(struct sk_buff *skb)
 }
 #endif   /* CONFIG_NET_DIVERT */
 
+void update_shared_ring(void)
+{
+    rx_shadow_entry_t *rx;
+    shared_info_t *s = current->shared_info;
+    net_ring_t *net_ring;
+    net_shadow_ring_t *shadow_ring;
+    unsigned int nvif;
 
+    clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
+    for (nvif = 0; nvif < current->num_net_vifs; nvif++)
+    {
+        net_ring = current->net_vif_list[nvif]->net_ring;
+        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
+        while ((shadow_ring->rx_idx != shadow_ring->rx_cons) 
+                && (net_ring->rx_cons != net_ring->rx_prod))
+        {
+            rx = shadow_ring->rx_ring+shadow_ring->rx_idx;
+            copy_to_user(net_ring->rx_ring + net_ring->rx_cons, rx, sizeof(rx_entry_t));
+
+            shadow_ring->rx_idx = RX_RING_INC(shadow_ring->rx_idx);
+            net_ring->rx_cons   = RX_RING_INC(net_ring->rx_cons);
+
+            if ( net_ring->rx_cons == net_ring->rx_event )
+                set_bit(_EVENT_NET_RX_FOR_VIF(nvif), &s->events);
+            
+        }
+    }
+}
+            
 void flush_rx_queue(void)
 {
     struct sk_buff *skb;
@@ -906,6 +1015,8 @@ void flush_rx_queue(void)
     net_shadow_ring_t *shadow_ring;
     unsigned int i, nvif;
     rx_shadow_entry_t *rx;
+    unsigned long *g_pte, tmp;
+    struct pfn_info *g_pfn, *h_pfn;
     
     /* I have changed this to batch flush all vifs for a guest
      * at once, whenever this is called.  Since the guest is about to be
@@ -918,7 +1029,6 @@ void flush_rx_queue(void)
      * loop can be replaced with a translation to the specific NET 
      * interrupt to serve. --akw
      */
-    
     clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
 
     for (nvif = 0; nvif < current->num_net_vifs; nvif++)
@@ -928,6 +1038,8 @@ void flush_rx_queue(void)
         while ( (skb = skb_dequeue(&current->net_vif_list[nvif]->skb_list)) 
                         != NULL )
         {
+            //temporary hack to stop processing non-zc skbs.
+            if (skb->skb_type == SKB_NORMAL) continue;
             /*
              * Write the virtual MAC address into the destination field
              * of the ethernet packet. Furthermore, do the same for ARP
@@ -938,6 +1050,16 @@ void flush_rx_queue(void)
              * second sixteen bits, which are the per-host vif id.
              * (so eth0 should be 00-00-..., eth1 is 00-01-...)
              */
+            
+            if (skb->skb_type == SKB_ZERO_COPY)
+            {
+                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
+                skb->data = skb->head;
+                skb_reserve(skb,16); 
+                skb->mac.raw = skb->data;
+                skb->data += ETH_HLEN;
+            }
+            
             memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
             *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
             if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
@@ -946,9 +1068,15 @@ void flush_rx_queue(void)
                 *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
             }
 
+            if (skb->skb_type == SKB_ZERO_COPY)
+            {
+                unmap_domain_mem(skb->head);
+            }
+
             i = net_ring->rx_cons;
             if ( i != net_ring->rx_prod )
             {
+                net_ring->rx_ring[i].status = shadow_ring->rx_ring[i].status;
                 if ( shadow_ring->rx_ring[i].status == RING_STATUS_OK)
                 {
                     rx = shadow_ring->rx_ring+i;
@@ -959,7 +1087,7 @@ void flush_rx_queue(void)
                      * replaced with a page swizzle.
                      */
 
-                    if (skb->skb_type == SKB_ZERO_COPY)
+                    /*if (skb->skb_type == SKB_ZERO_COPY)
                     {
                         skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
                         skb->data = skb->head;
@@ -975,7 +1103,49 @@ void flush_rx_queue(void)
                     {
                         unmap_domain_mem(skb->head);
                         skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
+                    }*/
+
+                    //presumably I don't need to rewalk the guest page table
+                    //here.
+                    if (skb->skb_type == SKB_ZERO_COPY) 
+                    {
+                        // g_pfn is the frame FROM the guest being given up
+                        // h_pfn is the frame FROM the hypervisor, passing up.
+                        
+                        if (rx->flush_count == tlb_flush_count[smp_processor_id()])
+                        {
+                            flush_tlb_all();
+                        }
+                        
+                        g_pte = map_domain_mem(rx->addr);
+                        
+                        //g_pfn = frame_table + (rx->addr >> PAGE_SHIFT);
+                        g_pfn =  frame_table + (*g_pte >> PAGE_SHIFT);
+                        h_pfn = skb->pf;
+
+
+                        tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp;
+                        tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp;
+                        tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp;
+                        
+                        h_pfn->tot_count = 1;
+                        h_pfn->type_count = g_pfn->type_count;
+                        g_pfn->tot_count = g_pfn->type_count = 0;
+                        
+                        h_pfn->flags = current->domain | PGT_l1_page_table;
+                        g_pfn->flags = PGT_l1_page_table;
+
+
+                        *g_pte = (*g_pte & ~PAGE_MASK) | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
+
+                        *g_pte |= _PAGE_PRESENT;
+                        unmap_domain_mem(g_pte);
+
+                        skb->pf = g_pfn; // return the guest pfn to be put on the free list
+                    } else {
+                        BUG(); //got a non-zero copy skb.  which is not good.
                     }
+                    
                 }
                 net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
                 if ( net_ring->rx_cons == net_ring->rx_event )
@@ -1963,10 +2133,7 @@ int __init net_dev_init(void)
  * Called from guest OS to notify updates to its transmit and/or receive
  * descriptor rings.
  */
-#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
-#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
-#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
-#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
+
 long do_net_update(void)
 {
     shared_info_t *shared = current->shared_info;    
@@ -1976,7 +2143,12 @@ long do_net_update(void)
     unsigned int i, j;
     struct sk_buff *skb;
     tx_entry_t tx;
-
+    rx_shadow_entry_t *rx;
+    unsigned long pfn;
+    struct pfn_info *page;
+    unsigned long *g_pte;
+    
+    
     for ( j = 0; j < current->num_net_vifs; j++)
     {
         current_vif = current->net_vif_list[j];
@@ -2034,6 +2206,7 @@ long do_net_update(void)
                 net_get_target_vif(skb);
                 if ( skb->dst_vif > VIF_PHYSICAL_INTERFACE )
                 {
+printk("LOCAL DELIVERY!\n");
                     (void)netif_rx(skb);
                 }
                 else if ( skb->dst_vif == VIF_PHYSICAL_INTERFACE )
@@ -2051,32 +2224,48 @@ long do_net_update(void)
         net_ring->tx_cons = i;
 
         /* Next, pull any new RX descriptors across to the shadow ring.
-         * Note that in the next revision, these will reference PTEs and the
-         * code here will have to validate reference and flush counts, copy the 
-         * descriptor, change the ownership to dom0 and invalidate the client's
-         * version of the page.
          */
     
         shadow_ring = current_vif->shadow_ring;
 
-        for (i = shadow_ring->rx_prod; i != net_ring->rx_prod; i = TX_RING_INC(i))
+        for (i = shadow_ring->rx_prod; i != net_ring->rx_prod; i = RX_RING_INC(i))
         {
-            /* This copy assumes that rx_shadow_entry_t is an extension of rx_net_entry_t
-             * extra fields must be tacked on to the end.
+            /* This copy assumes that rx_shadow_entry_t is an extension of 
+             * rx_net_entry_t extra fields must be tacked on to the end.
              */
-            
             if ( copy_from_user( shadow_ring->rx_ring+i, net_ring->rx_ring+i, 
                                  sizeof (rx_entry_t) ) )
             {
                 shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
+                continue;
             } else {
+                    
+                rx = shadow_ring->rx_ring + i;
+                pfn = rx->addr >> PAGE_SHIFT;
+                page = frame_table + pfn;
+                
                 shadow_ring->rx_ring[i].status = RING_STATUS_OK;
+
+               if  (!(page->flags & PGT_l1_page_table) 
+                    || !((page->flags & PG_domain_mask) == current->domain))
+                       shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE; 
+
+
+                g_pte = map_domain_mem(rx->addr);
+
+                if (!(*g_pte & _PAGE_PRESENT))
+                        shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
+                page = (*g_pte >> PAGE_SHIFT) + frame_table;
+                if (page->tot_count != 1) 
+                        shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
+                
+                *g_pte &= ~_PAGE_PRESENT;
+                rx->flush_count = tlb_flush_count[smp_processor_id()];
+                unmap_domain_mem(g_pte);
             }
         }
-
         shadow_ring->rx_prod = net_ring->rx_prod;
     }
-
     return 0;
 }
 
diff --git a/xen-2.4.16/net/skbuff.c b/xen-2.4.16/net/skbuff.c
index abd2c73ed1..467e161090 100644
--- a/xen-2.4.16/net/skbuff.c
+++ b/xen-2.4.16/net/skbuff.c
@@ -180,8 +180,8 @@ static inline void dealloc_skb_data_page(struct sk_buff *skb)
         pf = skb->pf;
 
         spin_lock_irqsave(&free_list_lock, flags);
-
-        list_add_tail(&pf->list, &free_list);
+        
+        list_add(&pf->list, &free_list);
         free_pfns++;
 
         spin_unlock_irqrestore(&free_list_lock, flags);
@@ -213,6 +213,7 @@ struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
         /* Get the DATA. Size must match skb_add_mtu(). */
         size = SKB_DATA_ALIGN(size);
         data = alloc_skb_data_page(skb);
+
         if (data == NULL)
                 goto nodata;
 
@@ -237,6 +238,7 @@ struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
         skb_shinfo(skb)->nr_frags = 0;
         skb_shinfo(skb)->frag_list = NULL;
+
         return skb;
 
 nodata:
@@ -381,6 +383,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
 
 static void skb_release_data(struct sk_buff *skb)
 {
+
 	if (!skb->cloned ||
 	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
 		if (skb_shinfo(skb)->nr_frags) {
@@ -394,10 +397,9 @@ static void skb_release_data(struct sk_buff *skb)
 
                 if (skb->skb_type == SKB_NORMAL) {
 		    kfree(skb->head);
-                } else if (skb->skb_type == SKB_ZERO_COPY) {
-                    dealloc_skb_data_page(skb);
+                } else if (skb->skb_type == SKB_ZERO_COPY) {                    dealloc_skb_data_page(skb);
                 } else {
-                    printk("skb_release_data called with unknown skb type!\n");
+                    BUG(); //skb_release_data called with unknown skb type!
                 }
 	}
 }
@@ -436,6 +438,7 @@ void __kfree_skb(struct sk_buff *skb)
 		}
 		skb->destructor(skb);
 	}
+
 #ifdef CONFIG_NETFILTER
 	nf_conntrack_put(skb->nfct);
 #endif
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c
index b018e29e49..9c40ad0066 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c
+++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c
@@ -192,9 +192,9 @@ static void network_alloc_rx_buffers(struct net_device *dev)
         skb = dev_alloc_skb(RX_BUF_SIZE);
         if ( skb == NULL ) break;
         skb->dev = dev;
-        skb_reserve(skb, 2); /* word align the IP header */
+        //skb_reserve(skb, 2); /* word align the IP header */
         np->rx_skb_ring[i] = skb;
-        np->net_ring->rx_ring[i].addr = (unsigned long)skb->data;
+        np->net_ring->rx_ring[i].addr = (unsigned long)skb->net_page->ppte; //data;
         np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
     }
 
@@ -276,10 +276,18 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
  again:
     for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
     {
+        if (np->net_ring->rx_ring[i].status != RING_STATUS_OK)
+        {
+                printk("bad buffer on RX ring!(%d)\n", 
+                                np->net_ring->rx_ring[i].status);
+                continue;
+        }
         skb = np->rx_skb_ring[i];
+        
         skb_put(skb, np->net_ring->rx_ring[i].size);
         skb->protocol = eth_type_trans(skb, dev);
         np->stats.rx_packets++;
+
         np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
         netif_rx(skb);
         dev->last_rx = jiffies;
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h
index 250b64fac8..0f097342ba 100644
--- a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h
+++ b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h
@@ -2,7 +2,7 @@
 #define _ASM_IO_H
 
 #include <linux/config.h>
-
+#include <asm/hypervisor.h>
 /*
  * This file contains the definitions for the x86 IO instructions
  * inb/inw/inl/outb/outw/outl and the "string versions" of the same
@@ -73,6 +73,22 @@ static inline void * phys_to_virt(unsigned long address)
 	return __va(address);
 }
 
+/*
+ * Change virtual addresses to machine addresses and vv.
+ * These are equally trivial.
+ */
+
+static inline unsigned long virt_to_mach(volatile void * address)
+{
+       return __pa(address) + (unsigned long) start_info.phys_base;
+}
+
+static inline void *mach_to_virt(unsigned long address)
+{
+        return __va(address) - (unsigned long) start_info.phys_base;
+}
+
+
 /*
  * Change "struct page" to physical address.
  */
diff --git a/xenolinux-2.4.16-sparse/net/core/skbuff.c b/xenolinux-2.4.16-sparse/net/core/skbuff.c
index e6891de566..ec76f00de0 100644
--- a/xenolinux-2.4.16-sparse/net/core/skbuff.c
+++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c
@@ -59,7 +59,7 @@
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/sock.h>
-
+#include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
@@ -246,19 +246,17 @@ void init_net_pages(unsigned long order_pages)
         {
                 np = net_page_table + i;
                 np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE);
-                
+
                 // now fill the pte pointer:
                 np->ppte = 0xdeadbeef;
                 pgd = pgd_offset_k(np->virt_addr);
-                if (!pgd_none(*pgd))
-                {
-                    pmd = pmd_offset(pgd, np->virt_addr);
-                    if (!pmd_none(*pmd))
-                    {
-                            ptep = pte_offset(pmd, np->virt_addr);
-                            np->ppte = (unsigned long)ptep; // neet to virt_to_phys this?
-                    }
-                }
+                if (pgd_none(*pgd) || pgd_bad(*pgd)) BUG();
+
+                if (pmd_none(*pmd)) BUG(); 
+                if (pmd_bad(*pmd)) BUG();
+
+                ptep = pte_offset(pmd, np->virt_addr);
+                np->ppte = (unsigned long)virt_to_mach(ptep);
 
                 list_add_tail(&np->list, &net_page_list);
         }
@@ -297,10 +295,11 @@ void free_net_page(struct net_page_info *np)
     
     spin_lock_irqsave(&net_page_list_lock, flags);
     
-    list_add_tail(&np->list, &net_page_list);
+    list_add(&np->list, &net_page_list);
     net_pages++;
 
     spin_unlock_irqrestore(&net_page_list_lock, flags);
+
 }
 
 struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
@@ -427,12 +426,14 @@ static void skb_clone_fraglist(struct sk_buff *skb)
 
 static void skb_release_data(struct sk_buff *skb)
 {
-	if (!skb->cloned ||
+        if (!skb->cloned ||
 	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
 		if (skb_shinfo(skb)->nr_frags) {
 			int i;
-			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 
+{
 				put_page(skb_shinfo(skb)->frags[i].page);
+}
 		}
 
 		if (skb_shinfo(skb)->frag_list)
@@ -445,6 +446,7 @@ static void skb_release_data(struct sk_buff *skb)
                     free_net_page(skb->net_page);
                 }
 	}
+
 }
 
 /*
-- 
2.30.2